***************
* This script prepares the data for the post-double selection with Lasso
* Author: Daniel Kopp
***************

	clear
	clear matrix
	clear mata
	set more off	
	set maxvar 11000
	
	* We define the following globals before loading the data since this is much faster
	preserve
	import excel "Help_files\interactvar.xlsx", clear firstrow
	quietly ds
	global interactvar = r(varlist)
	restore
	
	preserve
	import excel "Help_files\interactgender.xlsx", clear firstrow
	quietly ds
	global interactgender = r(varlist)
	restore
	
	preserve
	import excel "Help_files\interactethnicity.xlsx", clear firstrow
	quietly ds
	global interactethnicity = r(varlist)
	restore
	
	use "data_processed\with_interactions_compr_all_full.dta", clear
	
	****************************
	* Sample restriction
	****************************

	run "Help_files\sample_restrictions_parttime.do"

	* We keep only those searches in which the candidate pool has not been restricted to either full- or part-time workers
	keep if s_workload==.

	****************************
	* Define Globals
	****************************
	
	global skills_dummy_full    "has_skills sk_det_lang0 sk_det_lang1 sk_det_lang2 sk_det_lang3 sk_det_lang4 sk_det_lang_m  sk_exp  sk_education   sk_educ_tert  sk_educ_lehre  sk_educ_weiter sk_softskills sk_it_gen sk_it_deep sk_machines sk_leadership sk_language sk_lgerman sk_lfrench sk_litalian sk_lenglish sk_lchgerman sk_exp_dur_c0 sk_exp_dur_c1 sk_exp_dur_c2 sk_exp_dur_c3 sk_exp_dur_c4 sk_exp_dur_c_m sk_lsoutheuro sk_lother sk_lmiddleeast sk_lbalkan sk_lcentreast sk_lasia skill_country_region1_general skill_country_region2_general skill_country_region3_general skill_country_region4_general skill_country_region5_general skill_country_region6_general skill_country_region8_general skill_country_region9_general" 
	global lang_dummy_full 	"l_German l_CH_German l_English l_French  l_Italian l_northwest	l_southeuro l_centreast l_Balkan l_middleeast l_Asia l_Other l_missing"
	
	* Without geschlecht	workvolume_c1 workvolume_c2 workvolume_c3 workvolume_c_m
	global other_dummy "erfa_s0 erfa_s1 erfa_s2 erfa_s3   mobility_cat0 mobility_cat1 mobility_cat2 mobility_cat3 mobility_cat4 	limcontract0 limcontract1 limcontract99 verfuegbar0 verfuegbar1 verfuegbar99 no_searchvar no_diff_prof_m 	zuletzt_s abschluss_s0 abschluss_s1 abschluss_s2 abschluss_s3 	gesch_kontakt Sonn_Feiertag Schichtarbeit Nachtarbeit Heimarbeit Lehre fuehrer_kat0 fuehrer_kat1 fuehrer_kat2 fuehrer_kat3 fuehrer_kat4 fuehrer_kat5 fuehrer_kat6 	educ_short0 educ_short1 educ_short2 educ_short99  prof_length_1 prof_length_2 prof_length_3 prof_length_4 prof_length_99 user_logged_in"
	global kanton "kanton1 kanton2 kanton3 kanton4 kanton5 kanton6 kanton7 kanton8 kanton9 kanton10 kanton11 kanton12 kanton13 kanton14 kanton15 kanton16 kanton17 kanton18 kanton19 kanton20 kanton21 kanton22 kanton23 kanton24 kanton25 kanton26"
	global lang_region "lang_region1 lang_region2 lang_region3 lang_region4"
	global other_contin " sk_est_n_word no_diff_prof prof_exp_tot"
	global occup_dummy "bn2000_1_s1 bn2000_1_s2 bn2000_1_s3 bn2000_1_s4 bn2000_1_s5 bn2000_1_s6 bn2000_1_s7 bn2000_1_s8 bn2000_1_s9"
	global squared "sk_est_n_word_2 no_diff_prof_2 prof_exp_tot_2"
	global nation_name_kat "nation_kat0 nation_kat1 nation_kat2 nation_kat3 nation_kat4 nation_kat5 nation_kat6 nation_kat8 nation_kat9 nation_kat_m name_kat0 name_kat1 name_kat2 name_kat3 name_kat4 name_kat5 name_kat6 name_kat7 name_kat8 name_kat_m"
	
		
	*********************************
	* Drop variables we don't need
	*********************************
	
	* We drop all interactions with gender
	drop $interactgender
	
	* Drop all workvolume-interactions:
	drop workvolume_c1Xerfa_s0 workvolume_c1Xerfa_s1 workvolume_c1Xerfa_s2 workvolume_c1Xerfa_s3 workvolume_c1Xmobility_cat0 workvolume_c1Xmobility_cat1 workvolume_c1Xmobility_cat2 workvolume_c1Xmobility_cat3 workvolume_c1Xmobility_cat4 workvolume_c1Xlimcontract0 workvolume_c1Xlimcontract1 workvolume_c1Xlimcontract99 workvolume_c1Xverfuegbar0 workvolume_c1Xverfuegbar1 workvolume_c1Xverfuegbar99 workvolume_c1Xno_searchvar workvolume_c1Xno_diff_prof_m workvolume_c1Xzuletzt_s workvolume_c1Xabschluss_s0 workvolume_c1Xabschluss_s1 workvolume_c1Xabschluss_s2 workvolume_c1Xabschluss_s3 workvolume_c1Xgesch_kontakt workvolume_c1XSonn_Feiertag workvolume_c1XSchichtarbeit workvolume_c1XNachtarbeit workvolume_c1XHeimarbeit workvolume_c1XLehre workvolume_c1Xfuehrer_kat0 workvolume_c1Xfuehrer_kat1 workvolume_c1Xfuehrer_kat2 workvolume_c1Xfuehrer_kat3 workvolume_c1Xfuehrer_kat4 workvolume_c1Xfuehrer_kat5 workvolume_c1Xfuehrer_kat6 workvolume_c1Xeduc_short0 workvolume_c1Xeduc_short1 workvolume_c1Xeduc_short2 workvolume_c1Xeduc_short99 workvolume_c1Xprof_length_1 workvolume_c1Xprof_length_2 workvolume_c1Xprof_length_3 workvolume_c1Xprof_length_4 workvolume_c1Xprof_length_99 workvolume_c1Xuser_logged_in workvolume_c1Xl_German workvolume_c1Xl_CH_German workvolume_c1Xl_English workvolume_c1Xl_French workvolume_c1Xl_Italian workvolume_c1Xl_missing workvolume_c1Xhas_skills workvolume_c1Xsk_det_lang0 workvolume_c1Xsk_det_lang1 workvolume_c1Xsk_det_lang2 workvolume_c1Xsk_det_lang3 workvolume_c1Xsk_det_lang4 workvolume_c1Xsk_exp workvolume_c1Xsk_education workvolume_c1Xsk_educ_tert workvolume_c1Xsk_educ_lehre workvolume_c1Xsk_educ_weiter workvolume_c1Xsk_softskills workvolume_c1Xsk_it_gen workvolume_c1Xsk_it_deep workvolume_c1Xsk_machines workvolume_c1Xsk_leadership workvolume_c1Xsk_language workvolume_c1Xsk_lgerman workvolume_c1Xsk_lfrench workvolume_c1Xsk_litalian workvolume_c1Xsk_lenglish workvolume_c1Xsk_lchgerman workvolume_c1Xsk_exp_dur_c0 workvolume_c1Xsk_exp_dur_c1 workvolume_c1Xsk_exp_dur_c2 workvolume_c1Xsk_exp_dur_c3 workvolume_c1Xsk_exp_dur_c4 workvolume_c1Xbn2000_1_s1 workvolume_c1Xbn2000_1_s2 workvolume_c1Xbn2000_1_s3 workvolume_c1Xbn2000_1_s4 workvolume_c1Xbn2000_1_s5 workvolume_c1Xbn2000_1_s6 workvolume_c1Xbn2000_1_s7 workvolume_c1Xbn2000_1_s8 workvolume_c1Xbn2000_1_s9 workvolume_c1Xlang_region1 workvolume_c1Xlang_region2 workvolume_c1Xlang_region3 workvolume_c1Xlang_region4 workvolume_c2Xerfa_s0 workvolume_c2Xerfa_s1 workvolume_c2Xerfa_s2 workvolume_c2Xerfa_s3 workvolume_c2Xmobility_cat0 workvolume_c2Xmobility_cat1 workvolume_c2Xmobility_cat2 workvolume_c2Xmobility_cat3 workvolume_c2Xmobility_cat4 workvolume_c2Xlimcontract0 workvolume_c2Xlimcontract1 workvolume_c2Xlimcontract99 workvolume_c2Xverfuegbar0 workvolume_c2Xverfuegbar1 workvolume_c2Xverfuegbar99 workvolume_c2Xno_searchvar workvolume_c2Xno_diff_prof_m workvolume_c2Xzuletzt_s workvolume_c2Xabschluss_s0 workvolume_c2Xabschluss_s1 workvolume_c2Xabschluss_s2 workvolume_c2Xabschluss_s3 workvolume_c2Xgesch_kontakt workvolume_c2XSonn_Feiertag workvolume_c2XSchichtarbeit workvolume_c2XNachtarbeit workvolume_c2XHeimarbeit workvolume_c2XLehre workvolume_c2Xfuehrer_kat0 workvolume_c2Xfuehrer_kat1 workvolume_c2Xfuehrer_kat2 workvolume_c2Xfuehrer_kat3 workvolume_c2Xfuehrer_kat4 workvolume_c2Xfuehrer_kat5 workvolume_c2Xfuehrer_kat6 workvolume_c2Xeduc_short0 workvolume_c2Xeduc_short1 workvolume_c2Xeduc_short2 workvolume_c2Xeduc_short99 workvolume_c2Xprof_length_1 workvolume_c2Xprof_length_2 workvolume_c2Xprof_length_3 workvolume_c2Xprof_length_4 workvolume_c2Xprof_length_99 workvolume_c2Xuser_logged_in workvolume_c2Xl_German workvolume_c2Xl_CH_German workvolume_c2Xl_English workvolume_c2Xl_French workvolume_c2Xl_Italian workvolume_c2Xl_missing workvolume_c2Xhas_skills workvolume_c2Xsk_det_lang0 workvolume_c2Xsk_det_lang1 workvolume_c2Xsk_det_lang2 workvolume_c2Xsk_det_lang3 workvolume_c2Xsk_det_lang4 workvolume_c2Xsk_exp workvolume_c2Xsk_education workvolume_c2Xsk_educ_tert workvolume_c2Xsk_educ_lehre workvolume_c2Xsk_educ_weiter workvolume_c2Xsk_softskills workvolume_c2Xsk_it_gen workvolume_c2Xsk_it_deep workvolume_c2Xsk_machines workvolume_c2Xsk_leadership workvolume_c2Xsk_language workvolume_c2Xsk_lgerman workvolume_c2Xsk_lfrench workvolume_c2Xsk_litalian workvolume_c2Xsk_lenglish workvolume_c2Xsk_lchgerman workvolume_c2Xsk_exp_dur_c0 workvolume_c2Xsk_exp_dur_c1 workvolume_c2Xsk_exp_dur_c2 workvolume_c2Xsk_exp_dur_c3 workvolume_c2Xsk_exp_dur_c4 workvolume_c2Xbn2000_1_s1 workvolume_c2Xbn2000_1_s2 workvolume_c2Xbn2000_1_s3 workvolume_c2Xbn2000_1_s4 workvolume_c2Xbn2000_1_s5 workvolume_c2Xbn2000_1_s6 workvolume_c2Xbn2000_1_s7 workvolume_c2Xbn2000_1_s8 workvolume_c2Xbn2000_1_s9 workvolume_c2Xlang_region1 workvolume_c2Xlang_region2 workvolume_c2Xlang_region3 workvolume_c2Xlang_region4 workvolume_c3Xerfa_s0 workvolume_c3Xerfa_s1 workvolume_c3Xerfa_s2 workvolume_c3Xerfa_s3 workvolume_c3Xmobility_cat0 workvolume_c3Xmobility_cat1 workvolume_c3Xmobility_cat2 workvolume_c3Xmobility_cat3 workvolume_c3Xmobility_cat4 workvolume_c3Xlimcontract0 workvolume_c3Xlimcontract1 workvolume_c3Xlimcontract99 workvolume_c3Xverfuegbar0 workvolume_c3Xverfuegbar1 workvolume_c3Xverfuegbar99 workvolume_c3Xno_searchvar workvolume_c3Xno_diff_prof_m workvolume_c3Xzuletzt_s workvolume_c3Xabschluss_s0 workvolume_c3Xabschluss_s1 workvolume_c3Xabschluss_s2 workvolume_c3Xabschluss_s3 workvolume_c3Xgesch_kontakt workvolume_c3XSonn_Feiertag workvolume_c3XSchichtarbeit workvolume_c3XNachtarbeit workvolume_c3XHeimarbeit workvolume_c3XLehre workvolume_c3Xfuehrer_kat0 workvolume_c3Xfuehrer_kat1 workvolume_c3Xfuehrer_kat2 workvolume_c3Xfuehrer_kat3 workvolume_c3Xfuehrer_kat4 workvolume_c3Xfuehrer_kat5 workvolume_c3Xeduc_short0 workvolume_c3Xeduc_short1 workvolume_c3Xeduc_short2 workvolume_c3Xeduc_short99 workvolume_c3Xprof_length_1 workvolume_c3Xprof_length_2 workvolume_c3Xprof_length_3 workvolume_c3Xprof_length_4 workvolume_c3Xprof_length_99 workvolume_c3Xuser_logged_in workvolume_c3Xl_German workvolume_c3Xl_CH_German workvolume_c3Xl_English workvolume_c3Xl_French workvolume_c3Xl_Italian workvolume_c3Xl_missing workvolume_c3Xhas_skills workvolume_c3Xsk_det_lang0 workvolume_c3Xsk_det_lang1 workvolume_c3Xsk_det_lang2 workvolume_c3Xsk_det_lang3 workvolume_c3Xsk_det_lang4 workvolume_c3Xsk_exp workvolume_c3Xsk_education workvolume_c3Xsk_educ_tert workvolume_c3Xsk_educ_lehre workvolume_c3Xsk_educ_weiter workvolume_c3Xsk_softskills workvolume_c3Xsk_it_gen workvolume_c3Xsk_it_deep workvolume_c3Xsk_machines workvolume_c3Xsk_leadership workvolume_c3Xsk_language workvolume_c3Xsk_lgerman workvolume_c3Xsk_lfrench workvolume_c3Xsk_litalian workvolume_c3Xsk_lenglish workvolume_c3Xsk_lchgerman workvolume_c3Xsk_exp_dur_c0 workvolume_c3Xsk_exp_dur_c1 workvolume_c3Xsk_exp_dur_c2 workvolume_c3Xsk_exp_dur_c3 workvolume_c3Xsk_exp_dur_c4 workvolume_c3Xbn2000_1_s1 workvolume_c3Xbn2000_1_s2 workvolume_c3Xbn2000_1_s3 workvolume_c3Xbn2000_1_s4 workvolume_c3Xbn2000_1_s5 workvolume_c3Xbn2000_1_s6 workvolume_c3Xbn2000_1_s7 workvolume_c3Xbn2000_1_s8 workvolume_c3Xbn2000_1_s9 workvolume_c3Xlang_region1 workvolume_c3Xlang_region2 workvolume_c3Xlang_region3 workvolume_c3Xlang_region4 workvolume_c1Xsk_est_n_word workvolume_c1Xno_diff_prof workvolume_c1Xprof_exp_tot workvolume_c1Xsk_est_n_word_2 workvolume_c1Xno_diff_prof_2 workvolume_c1Xprof_exp_tot_2 workvolume_c2Xsk_est_n_word workvolume_c2Xno_diff_prof workvolume_c2Xprof_exp_tot workvolume_c2Xsk_est_n_word_2 workvolume_c2Xno_diff_prof_2 workvolume_c2Xprof_exp_tot_2 workvolume_c3Xsk_est_n_word workvolume_c3Xno_diff_prof workvolume_c3Xprof_exp_tot workvolume_c3Xsk_est_n_word_2 workvolume_c3Xno_diff_prof_2 workvolume_c3Xprof_exp_tot_2 nation_kat0Xworkvolume_c1 nation_kat0Xworkvolume_c2 nation_kat0Xworkvolume_c3 nation_kat1Xworkvolume_c1 nation_kat1Xworkvolume_c2 nation_kat1Xworkvolume_c3 nation_kat2Xworkvolume_c1 nation_kat2Xworkvolume_c2 nation_kat2Xworkvolume_c3 nation_kat3Xworkvolume_c1 nation_kat3Xworkvolume_c2 nation_kat3Xworkvolume_c3 nation_kat4Xworkvolume_c1 nation_kat4Xworkvolume_c2 nation_kat4Xworkvolume_c3 nation_kat5Xworkvolume_c1 nation_kat5Xworkvolume_c2 nation_kat5Xworkvolume_c3 nation_kat6Xworkvolume_c1 nation_kat6Xworkvolume_c2 nation_kat6Xworkvolume_c3 nation_kat8Xworkvolume_c1 nation_kat8Xworkvolume_c2 nation_kat8Xworkvolume_c3 nation_kat9Xworkvolume_c1 nation_kat9Xworkvolume_c2 nation_kat9Xworkvolume_c3 name_kat0Xworkvolume_c1 name_kat0Xworkvolume_c2 name_kat0Xworkvolume_c3 name_kat1Xworkvolume_c1 name_kat1Xworkvolume_c2 name_kat1Xworkvolume_c3 name_kat2Xworkvolume_c1 name_kat2Xworkvolume_c2 name_kat2Xworkvolume_c3 name_kat3Xworkvolume_c1 name_kat3Xworkvolume_c2 name_kat3Xworkvolume_c3 name_kat4Xworkvolume_c1 name_kat4Xworkvolume_c2 name_kat4Xworkvolume_c3 name_kat5Xworkvolume_c1 name_kat5Xworkvolume_c2 name_kat5Xworkvolume_c3 name_kat6Xworkvolume_c1 name_kat6Xworkvolume_c2 name_kat6Xworkvolume_c3 name_kat7Xworkvolume_c1 name_kat7Xworkvolume_c2 name_kat7Xworkvolume_c3 name_kat8Xworkvolume_c1 name_kat8Xworkvolume_c2 name_kat8Xworkvolume_c3 nation_kat_mXworkvolume_c1 nation_kat_mXworkvolume_c2 nation_kat_mXworkvolume_c3 name_kat_mXworkvolume_c1 name_kat_mXworkvolume_c2 name_kat_mXworkvolume_c3

	* Drop all interactions with workvolume from globals
	local all_interactions $interactvar $interactethnicity
	local workvolume_interact	"workvolume_c1Xerfa_s0 workvolume_c1Xerfa_s1 workvolume_c1Xerfa_s2 workvolume_c1Xerfa_s3 workvolume_c1Xmobility_cat0 workvolume_c1Xmobility_cat1 workvolume_c1Xmobility_cat2 workvolume_c1Xmobility_cat3 workvolume_c1Xmobility_cat4 workvolume_c1Xlimcontract0 workvolume_c1Xlimcontract1 workvolume_c1Xlimcontract99 workvolume_c1Xverfuegbar0 workvolume_c1Xverfuegbar1 workvolume_c1Xverfuegbar99 workvolume_c1Xno_searchvar workvolume_c1Xno_diff_prof_m workvolume_c1Xzuletzt_s workvolume_c1Xabschluss_s0 workvolume_c1Xabschluss_s1 workvolume_c1Xabschluss_s2 workvolume_c1Xabschluss_s3 workvolume_c1Xgesch_kontakt workvolume_c1XSonn_Feiertag workvolume_c1XSchichtarbeit workvolume_c1XNachtarbeit workvolume_c1XHeimarbeit workvolume_c1XLehre workvolume_c1Xfuehrer_kat0 workvolume_c1Xfuehrer_kat1 workvolume_c1Xfuehrer_kat2 workvolume_c1Xfuehrer_kat3 workvolume_c1Xfuehrer_kat4 workvolume_c1Xfuehrer_kat5 workvolume_c1Xfuehrer_kat6 workvolume_c1Xeduc_short0 workvolume_c1Xeduc_short1 workvolume_c1Xeduc_short2 workvolume_c1Xeduc_short99 workvolume_c1Xprof_length_1 workvolume_c1Xprof_length_2 workvolume_c1Xprof_length_3 workvolume_c1Xprof_length_4 workvolume_c1Xprof_length_99 workvolume_c1Xuser_logged_in workvolume_c1Xl_German workvolume_c1Xl_CH_German workvolume_c1Xl_English workvolume_c1Xl_French workvolume_c1Xl_Italian workvolume_c1Xl_missing workvolume_c1Xhas_skills workvolume_c1Xsk_det_lang0 workvolume_c1Xsk_det_lang1 workvolume_c1Xsk_det_lang2 workvolume_c1Xsk_det_lang3 workvolume_c1Xsk_det_lang4 workvolume_c1Xsk_exp workvolume_c1Xsk_education workvolume_c1Xsk_educ_tert workvolume_c1Xsk_educ_lehre workvolume_c1Xsk_educ_weiter workvolume_c1Xsk_softskills workvolume_c1Xsk_it_gen workvolume_c1Xsk_it_deep workvolume_c1Xsk_machines workvolume_c1Xsk_leadership workvolume_c1Xsk_language workvolume_c1Xsk_lgerman workvolume_c1Xsk_lfrench workvolume_c1Xsk_litalian workvolume_c1Xsk_lenglish workvolume_c1Xsk_lchgerman workvolume_c1Xsk_exp_dur_c0 workvolume_c1Xsk_exp_dur_c1 workvolume_c1Xsk_exp_dur_c2 workvolume_c1Xsk_exp_dur_c3 workvolume_c1Xsk_exp_dur_c4 workvolume_c1Xbn2000_1_s1 workvolume_c1Xbn2000_1_s2 workvolume_c1Xbn2000_1_s3 workvolume_c1Xbn2000_1_s4 workvolume_c1Xbn2000_1_s5 workvolume_c1Xbn2000_1_s6 workvolume_c1Xbn2000_1_s7 workvolume_c1Xbn2000_1_s8 workvolume_c1Xbn2000_1_s9 workvolume_c1Xlang_region1 workvolume_c1Xlang_region2 workvolume_c1Xlang_region3 workvolume_c1Xlang_region4 workvolume_c2Xerfa_s0 workvolume_c2Xerfa_s1 workvolume_c2Xerfa_s2 workvolume_c2Xerfa_s3 workvolume_c2Xmobility_cat0 workvolume_c2Xmobility_cat1 workvolume_c2Xmobility_cat2 workvolume_c2Xmobility_cat3 workvolume_c2Xmobility_cat4 workvolume_c2Xlimcontract0 workvolume_c2Xlimcontract1 workvolume_c2Xlimcontract99 workvolume_c2Xverfuegbar0 workvolume_c2Xverfuegbar1 workvolume_c2Xverfuegbar99 workvolume_c2Xno_searchvar workvolume_c2Xno_diff_prof_m workvolume_c2Xzuletzt_s workvolume_c2Xabschluss_s0 workvolume_c2Xabschluss_s1 workvolume_c2Xabschluss_s2 workvolume_c2Xabschluss_s3 workvolume_c2Xgesch_kontakt workvolume_c2XSonn_Feiertag workvolume_c2XSchichtarbeit workvolume_c2XNachtarbeit workvolume_c2XHeimarbeit workvolume_c2XLehre workvolume_c2Xfuehrer_kat0 workvolume_c2Xfuehrer_kat1 workvolume_c2Xfuehrer_kat2 workvolume_c2Xfuehrer_kat3 workvolume_c2Xfuehrer_kat4 workvolume_c2Xfuehrer_kat5 workvolume_c2Xfuehrer_kat6 workvolume_c2Xeduc_short0 workvolume_c2Xeduc_short1 workvolume_c2Xeduc_short2 workvolume_c2Xeduc_short99 workvolume_c2Xprof_length_1 workvolume_c2Xprof_length_2 workvolume_c2Xprof_length_3 workvolume_c2Xprof_length_4 workvolume_c2Xprof_length_99 workvolume_c2Xuser_logged_in workvolume_c2Xl_German workvolume_c2Xl_CH_German workvolume_c2Xl_English workvolume_c2Xl_French workvolume_c2Xl_Italian workvolume_c2Xl_missing workvolume_c2Xhas_skills workvolume_c2Xsk_det_lang0 workvolume_c2Xsk_det_lang1 workvolume_c2Xsk_det_lang2 workvolume_c2Xsk_det_lang3 workvolume_c2Xsk_det_lang4 workvolume_c2Xsk_exp workvolume_c2Xsk_education workvolume_c2Xsk_educ_tert workvolume_c2Xsk_educ_lehre workvolume_c2Xsk_educ_weiter workvolume_c2Xsk_softskills workvolume_c2Xsk_it_gen workvolume_c2Xsk_it_deep workvolume_c2Xsk_machines workvolume_c2Xsk_leadership workvolume_c2Xsk_language workvolume_c2Xsk_lgerman workvolume_c2Xsk_lfrench workvolume_c2Xsk_litalian workvolume_c2Xsk_lenglish workvolume_c2Xsk_lchgerman workvolume_c2Xsk_exp_dur_c0 workvolume_c2Xsk_exp_dur_c1 workvolume_c2Xsk_exp_dur_c2 workvolume_c2Xsk_exp_dur_c3 workvolume_c2Xsk_exp_dur_c4 workvolume_c2Xbn2000_1_s1 workvolume_c2Xbn2000_1_s2 workvolume_c2Xbn2000_1_s3 workvolume_c2Xbn2000_1_s4 workvolume_c2Xbn2000_1_s5 workvolume_c2Xbn2000_1_s6 workvolume_c2Xbn2000_1_s7 workvolume_c2Xbn2000_1_s8 workvolume_c2Xbn2000_1_s9 workvolume_c2Xlang_region1 workvolume_c2Xlang_region2 workvolume_c2Xlang_region3 workvolume_c2Xlang_region4 workvolume_c3Xerfa_s0 workvolume_c3Xerfa_s1 workvolume_c3Xerfa_s2 workvolume_c3Xerfa_s3 workvolume_c3Xmobility_cat0 workvolume_c3Xmobility_cat1 workvolume_c3Xmobility_cat2 workvolume_c3Xmobility_cat3 workvolume_c3Xmobility_cat4 workvolume_c3Xlimcontract0 workvolume_c3Xlimcontract1 workvolume_c3Xlimcontract99 workvolume_c3Xverfuegbar0 workvolume_c3Xverfuegbar1 workvolume_c3Xverfuegbar99 workvolume_c3Xno_searchvar workvolume_c3Xno_diff_prof_m workvolume_c3Xzuletzt_s workvolume_c3Xabschluss_s0 workvolume_c3Xabschluss_s1 workvolume_c3Xabschluss_s2 workvolume_c3Xabschluss_s3 workvolume_c3Xgesch_kontakt workvolume_c3XSonn_Feiertag workvolume_c3XSchichtarbeit workvolume_c3XNachtarbeit workvolume_c3XHeimarbeit workvolume_c3XLehre workvolume_c3Xfuehrer_kat0 workvolume_c3Xfuehrer_kat1 workvolume_c3Xfuehrer_kat2 workvolume_c3Xfuehrer_kat3 workvolume_c3Xfuehrer_kat4 workvolume_c3Xfuehrer_kat5 workvolume_c3Xeduc_short0 workvolume_c3Xeduc_short1 workvolume_c3Xeduc_short2 workvolume_c3Xeduc_short99 workvolume_c3Xprof_length_1 workvolume_c3Xprof_length_2 workvolume_c3Xprof_length_3 workvolume_c3Xprof_length_4 workvolume_c3Xprof_length_99 workvolume_c3Xuser_logged_in workvolume_c3Xl_German workvolume_c3Xl_CH_German workvolume_c3Xl_English workvolume_c3Xl_French workvolume_c3Xl_Italian workvolume_c3Xl_missing workvolume_c3Xhas_skills workvolume_c3Xsk_det_lang0 workvolume_c3Xsk_det_lang1 workvolume_c3Xsk_det_lang2 workvolume_c3Xsk_det_lang3 workvolume_c3Xsk_det_lang4 workvolume_c3Xsk_exp workvolume_c3Xsk_education workvolume_c3Xsk_educ_tert workvolume_c3Xsk_educ_lehre workvolume_c3Xsk_educ_weiter workvolume_c3Xsk_softskills workvolume_c3Xsk_it_gen workvolume_c3Xsk_it_deep workvolume_c3Xsk_machines workvolume_c3Xsk_leadership workvolume_c3Xsk_language workvolume_c3Xsk_lgerman workvolume_c3Xsk_lfrench workvolume_c3Xsk_litalian workvolume_c3Xsk_lenglish workvolume_c3Xsk_lchgerman workvolume_c3Xsk_exp_dur_c0 workvolume_c3Xsk_exp_dur_c1 workvolume_c3Xsk_exp_dur_c2 workvolume_c3Xsk_exp_dur_c3 workvolume_c3Xsk_exp_dur_c4 workvolume_c3Xbn2000_1_s1 workvolume_c3Xbn2000_1_s2 workvolume_c3Xbn2000_1_s3 workvolume_c3Xbn2000_1_s4 workvolume_c3Xbn2000_1_s5 workvolume_c3Xbn2000_1_s6 workvolume_c3Xbn2000_1_s7 workvolume_c3Xbn2000_1_s8 workvolume_c3Xbn2000_1_s9 workvolume_c3Xlang_region1 workvolume_c3Xlang_region2 workvolume_c3Xlang_region3 workvolume_c3Xlang_region4 workvolume_c1Xsk_est_n_word workvolume_c1Xno_diff_prof workvolume_c1Xprof_exp_tot workvolume_c1Xsk_est_n_word_2 workvolume_c1Xno_diff_prof_2 workvolume_c1Xprof_exp_tot_2 workvolume_c2Xsk_est_n_word workvolume_c2Xno_diff_prof workvolume_c2Xprof_exp_tot workvolume_c2Xsk_est_n_word_2 workvolume_c2Xno_diff_prof_2 workvolume_c2Xprof_exp_tot_2 workvolume_c3Xsk_est_n_word workvolume_c3Xno_diff_prof workvolume_c3Xprof_exp_tot workvolume_c3Xsk_est_n_word_2 workvolume_c3Xno_diff_prof_2 workvolume_c3Xprof_exp_tot_2 nation_kat0Xworkvolume_c1 nation_kat0Xworkvolume_c2 nation_kat0Xworkvolume_c3 nation_kat1Xworkvolume_c1 nation_kat1Xworkvolume_c2 nation_kat1Xworkvolume_c3 nation_kat2Xworkvolume_c1 nation_kat2Xworkvolume_c2 nation_kat2Xworkvolume_c3 nation_kat3Xworkvolume_c1 nation_kat3Xworkvolume_c2 nation_kat3Xworkvolume_c3 nation_kat4Xworkvolume_c1 nation_kat4Xworkvolume_c2 nation_kat4Xworkvolume_c3 nation_kat5Xworkvolume_c1 nation_kat5Xworkvolume_c2 nation_kat5Xworkvolume_c3 nation_kat6Xworkvolume_c1 nation_kat6Xworkvolume_c2 nation_kat6Xworkvolume_c3 nation_kat8Xworkvolume_c1 nation_kat8Xworkvolume_c2 nation_kat8Xworkvolume_c3 nation_kat9Xworkvolume_c1 nation_kat9Xworkvolume_c2 nation_kat9Xworkvolume_c3 name_kat0Xworkvolume_c1 name_kat0Xworkvolume_c2 name_kat0Xworkvolume_c3 name_kat1Xworkvolume_c1 name_kat1Xworkvolume_c2 name_kat1Xworkvolume_c3 name_kat2Xworkvolume_c1 name_kat2Xworkvolume_c2 name_kat2Xworkvolume_c3 name_kat3Xworkvolume_c1 name_kat3Xworkvolume_c2 name_kat3Xworkvolume_c3 name_kat4Xworkvolume_c1 name_kat4Xworkvolume_c2 name_kat4Xworkvolume_c3 name_kat5Xworkvolume_c1 name_kat5Xworkvolume_c2 name_kat5Xworkvolume_c3 name_kat6Xworkvolume_c1 name_kat6Xworkvolume_c2 name_kat6Xworkvolume_c3 name_kat7Xworkvolume_c1 name_kat7Xworkvolume_c2 name_kat7Xworkvolume_c3 name_kat8Xworkvolume_c1 name_kat8Xworkvolume_c2 name_kat8Xworkvolume_c3 nation_kat_mXworkvolume_c1 nation_kat_mXworkvolume_c2 nation_kat_mXworkvolume_c3 name_kat_mXworkvolume_c1 name_kat_mXworkvolume_c2 name_kat_mXworkvolume_c3"
	local all_interactions: list all_interactions - workvolume_interact
	global all_interactions `all_interactions'
	
	
	****************************
	* Gen new variables
	****************************	
		
	* Detailed workvolume categories;
	gen 	workvolume_det_cat=0 if workvolume==100
	replace workvolume_det_cat=1 if inrange(workvolume,90,99)
	replace workvolume_det_cat=2 if inrange(workvolume,80,89)
	replace workvolume_det_cat=3 if inrange(workvolume,70,79)
	replace workvolume_det_cat=4 if inrange(workvolume,60,69)
	replace workvolume_det_cat=5 if inrange(workvolume,50,59)
	replace	workvolume_det_cat=6 if workvolume<50
	count if workvolume_det_cat==.		// 0
	
	* Gen dummies for workvolume_det_cat categories
	flevelsof workvolume_det_cat, local(levels)
	foreach i of local levels {
	gen 	byte workvolume_det_cat`i' = workvolume_det_cat==`i'
	replace      workvolume_det_cat`i' = . if workvolume_det_cat==.			
	}	
	
	global workvolume_det_cat "workvolume_det_cat0 workvolume_det_cat1 workvolume_det_cat2 workvolume_det_cat3 workvolume_det_cat4 workvolume_det_cat5 workvolume_det_cat6 "
	
	* Keep only those variables that we need for the lasso selection:
	keep contact_button_clicked $workvolume_det_cat  geschlecht  $nation_name_kat  $other_dummy $lang_dummy_full $skills_dummy_full  $other_contin  $kanton  $squared $all_interactions search_tag tracking_id stes_id click_candidate kanton rank
		
	********************************************************************************
	*** Draw a random sample of searches (not observations) and keep only 20 % of the searches 
	*** Otherwise the sample is too large to conduct the lasso selection - it's computationally not feasible even on a 0.5 Terrabyte RAM Server
	********************************************************************************

	tab contact_button_clicked,m		// 9.46%
	tab click_candidate,m				// 21.75%

	set seed 1001
	bysort search_tag: gen n = _n 			if search_tag!=. 
	gen u_ = runiform() 					if n==1 & search_tag!=. 
	bysort search_tag: egen u = mean(u_)	if search_tag!=. 
	keep if u>0.8
	drop u u_ n 

	tab contact_button_clicked,m		// 9.40%
	tab click_candidate,m				// 21.75%

	order contact_button_clicked  click_candidate $workvolume_det_cat    geschlecht  $nation_name_kat  $other_dummy $lang_dummy_full $skills_dummy_full  $other_contin  $kanton  $squared 

	preserve
	keep search_tag
	duplicates drop
	save "Misc_files\search_tags_20p_rand_sample_parttime.dta", replace
	restore
	
	* Dataset contains 6568 variables. 10 variables are later excluded from the lasso selection: contact_button_clicked  click_candidate   $workvolume_det_cat    geschlecht 
	
	save "data_processed\lasso_20p_rand_sample_parttime.dta", replace
	
	